{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:00:40.980089Z",
     "start_time": "2018-08-02T16:00:36.873385Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n",
      "/usr/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n",
      "/usr/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n"
     ]
    }
   ],
   "source": [
    "import os, sys, codecs, gc\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%pylab inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:02:38.425528Z",
     "start_time": "2018-08-02T16:00:49.631011Z"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:02:58.565717Z",
     "start_time": "2018-08-02T16:02:45.484774Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv('../input/train.csv', usecols=['api'])\n",
    "api_value = train['api'].unique()\n",
    "\n",
    "api_dict = {}\n",
    "for i, api in enumerate(api_value):\n",
    "    api_dict[api] = i\n",
    "    \n",
    "train_api = train['api'].apply(lambda x: api_dict[x])\n",
    "del train; gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:08:11.807471Z",
     "start_time": "2018-08-02T16:06:12.747276Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv('../input/train.csv', usecols=['file_id', 'label', 'tid', 'return_value', 'index'])\n",
    "train['api'] = train_api"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:13:28.236221Z",
     "start_time": "2018-08-02T16:13:28.227572Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_api' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m-------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-22-c21a28b422bc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mdel\u001b[0m \u001b[0mtrain_api\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'train_api' is not defined"
     ]
    }
   ],
   "source": [
    "del train_api"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:13:18.963582Z",
     "start_time": "2018-08-02T16:13:18.913563Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "92"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:39:11.937170Z",
     "start_time": "2018-08-02T16:38:52.720406Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "file_id 116623 0 int32\n",
      "label 5 0 int32\n",
      "tid 55000 100 int32\n",
      "return_value 8793823018752 -2147483643 int64\n",
      "index 5000 0 int32\n"
     ]
    }
   ],
   "source": [
    "for col in train:\n",
    "    print(col, train[col].max(), train[col].min(), train[col].dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:15:29.967652Z",
     "start_time": "2018-08-02T16:13:31.193289Z"
    }
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv('../input/train.csv', usecols=['file_id', 'label', 'tid', 'return_value', 'index'],\n",
    "    dtype = {'file_id': np.int32, 'label':np.int32, 'tid':np.int32, 'return_value':np.int64, 'index':np.int32})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:17:21.603296Z",
     "start_time": "2018-08-02T16:17:20.260419Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>tid</th>\n",
       "      <th>return_value</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1806073</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>13369348</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806074</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806075</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806076</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806077</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>12248408</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806078</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>12248688</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806079</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806080</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806081</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806082</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806083</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806084</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806085</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>7405696</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806086</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>7405732</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806087</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>3514368</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806088</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>3221225524</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806089</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>3221225480</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806090</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806091</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806092</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806093</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806094</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>3221225781</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806095</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>3221225781</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806096</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806097</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806098</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806099</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806100</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806101</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1806102</th>\n",
       "      <td>503</td>\n",
       "      <td>1</td>\n",
       "      <td>2700</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617768</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>364</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617769</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617770</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>366</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617771</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617772</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617773</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617774</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617775</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617776</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617777</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617778</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>374</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617779</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617780</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>2</td>\n",
       "      <td>376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617781</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>377</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617782</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617783</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617784</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>2</td>\n",
       "      <td>380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617785</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617786</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>32775</td>\n",
       "      <td>382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617787</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617788</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617789</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>5</td>\n",
       "      <td>385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617790</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>32775</td>\n",
       "      <td>386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617791</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617792</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617793</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>5</td>\n",
       "      <td>389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617794</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>111</td>\n",
       "      <td>390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617795</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617796</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405617797</th>\n",
       "      <td>115469</td>\n",
       "      <td>1</td>\n",
       "      <td>2732</td>\n",
       "      <td>0</td>\n",
       "      <td>393</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1369564 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           file_id  label   tid  return_value  index\n",
       "1806073        503      1  2700      13369348      0\n",
       "1806074        503      1  2700             0      1\n",
       "1806075        503      1  2700             1      2\n",
       "1806076        503      1  2700             0      3\n",
       "1806077        503      1  2700      12248408      4\n",
       "1806078        503      1  2700      12248688      5\n",
       "1806079        503      1  2700             1      6\n",
       "1806080        503      1  2700             0      7\n",
       "1806081        503      1  2700             0      8\n",
       "1806082        503      1  2700             0      9\n",
       "1806083        503      1  2700             0     10\n",
       "1806084        503      1  2700             0     11\n",
       "1806085        503      1  2700       7405696     12\n",
       "1806086        503      1  2700       7405732     13\n",
       "1806087        503      1  2700       3514368     14\n",
       "1806088        503      1  2700    3221225524     15\n",
       "1806089        503      1  2700    3221225480     16\n",
       "1806090        503      1  2700             0     17\n",
       "1806091        503      1  2700             0     18\n",
       "1806092        503      1  2700             0     19\n",
       "1806093        503      1  2700             0     20\n",
       "1806094        503      1  2700    3221225781     21\n",
       "1806095        503      1  2700    3221225781     22\n",
       "1806096        503      1  2700             0     23\n",
       "1806097        503      1  2700             0     24\n",
       "1806098        503      1  2700             0     25\n",
       "1806099        503      1  2700             0     26\n",
       "1806100        503      1  2700             0     27\n",
       "1806101        503      1  2700             0     28\n",
       "1806102        503      1  2700             0     29\n",
       "...            ...    ...   ...           ...    ...\n",
       "405617768   115469      1  2732             0    364\n",
       "405617769   115469      1  2732             0    365\n",
       "405617770   115469      1  2732             0    366\n",
       "405617771   115469      1  2732             0    367\n",
       "405617772   115469      1  2732             0    368\n",
       "405617773   115469      1  2732             0    369\n",
       "405617774   115469      1  2732             0    370\n",
       "405617775   115469      1  2732             0    371\n",
       "405617776   115469      1  2732             0    372\n",
       "405617777   115469      1  2732             0    373\n",
       "405617778   115469      1  2732             0    374\n",
       "405617779   115469      1  2732             0    375\n",
       "405617780   115469      1  2732             2    376\n",
       "405617781   115469      1  2732             0    377\n",
       "405617782   115469      1  2732             0    378\n",
       "405617783   115469      1  2732             0    379\n",
       "405617784   115469      1  2732             2    380\n",
       "405617785   115469      1  2732             0    381\n",
       "405617786   115469      1  2732         32775    382\n",
       "405617787   115469      1  2732             0    383\n",
       "405617788   115469      1  2732             0    384\n",
       "405617789   115469      1  2732             5    385\n",
       "405617790   115469      1  2732         32775    386\n",
       "405617791   115469      1  2732             0    387\n",
       "405617792   115469      1  2732             0    388\n",
       "405617793   115469      1  2732             5    389\n",
       "405617794   115469      1  2732           111    390\n",
       "405617795   115469      1  2732             0    391\n",
       "405617796   115469      1  2732             0    392\n",
       "405617797   115469      1  2732             0    393\n",
       "\n",
       "[1369564 rows x 5 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train[train['label'] == 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:20:24.854393Z",
     "start_time": "2018-08-02T16:20:21.968174Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    370157119\n",
       "5     25688289\n",
       "3      6108264\n",
       "2      5964169\n",
       "1      1369564\n",
       "4       343644\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['label'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 标签与其他字段的关系\n",
    "\n",
    "#### 基于label的统计，从file_id角度\n",
    "\n",
    "- file_id与api的关系\n",
    "- file_id与tid的关系\n",
    "- file_id与return_value的关系\n",
    "\n",
    "#### 返回值的角度\n",
    "\n",
    "#### 调用顺序的角度\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:42:12.509356Z",
     "start_time": "2018-08-02T16:42:12.252677Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>tid</th>\n",
       "      <th>return_value</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>1073741824</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>1073741824</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>394</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>-1073741515</td>\n",
       "      <td>396</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>-1073741515</td>\n",
       "      <td>397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>398</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>400</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>401</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>-1073741816</td>\n",
       "      <td>401</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>402</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>-1073741816</td>\n",
       "      <td>402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>403</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>-1073741816</td>\n",
       "      <td>403</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>404</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>406</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>407</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>407</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>408</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>409</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>411</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>412</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>413</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>414</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>415</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>416</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>416</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>417</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>418</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>419</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>420</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>421</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>-1073741772</td>\n",
       "      <td>421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>422</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>423</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>424 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     file_id  label   tid  return_value  index\n",
       "0          0      0  2644             0      0\n",
       "1          0      0  2644             0      1\n",
       "2          0      0  2644             0      2\n",
       "3          0      0  2644             0      3\n",
       "4          0      0  2644             0      4\n",
       "5          0      0  2644             0      5\n",
       "6          0      0  2644             0      6\n",
       "7          0      0  2644             0      7\n",
       "8          0      0  2644             0      8\n",
       "9          0      0  2644             0      9\n",
       "10         0      0  2644             0     10\n",
       "11         0      0  2644             0     11\n",
       "12         0      0  2644             0     12\n",
       "13         0      0  2644             0     13\n",
       "14         0      0  2644             0     14\n",
       "15         0      0  2644             0     15\n",
       "16         0      0  2644             0     16\n",
       "17         0      0  2644             0     17\n",
       "18         0      0  2644             0     18\n",
       "19         0      0  2644             0     19\n",
       "20         0      0  2644             0     20\n",
       "21         0      0  2644             0     21\n",
       "22         0      0  2644             0     22\n",
       "23         0      0  2644             0     23\n",
       "24         0      0  2644             0     24\n",
       "25         0      0  2644             0     25\n",
       "26         0      0  2644             0     26\n",
       "27         0      0  2644    1073741824     27\n",
       "28         0      0  2644    1073741824     28\n",
       "29         0      0  2644             0     29\n",
       "..       ...    ...   ...           ...    ...\n",
       "394        0      0  2644             0    394\n",
       "395        0      0  2644             0    395\n",
       "396        0      0  2644   -1073741515    396\n",
       "397        0      0  2644   -1073741515    397\n",
       "398        0      0  2644             0    398\n",
       "399        0      0  2644             0    399\n",
       "400        0      0  2644             0    400\n",
       "401        0      0  2644   -1073741816    401\n",
       "402        0      0  2644   -1073741816    402\n",
       "403        0      0  2644   -1073741816    403\n",
       "404        0      0  2644             0    404\n",
       "405        0      0  2644             0    405\n",
       "406        0      0  2644             0    406\n",
       "407        0      0  2644             0    407\n",
       "408        0      0  2644             0    408\n",
       "409        0      0  2644             0    409\n",
       "410        0      0  2644             0    410\n",
       "411        0      0  2644             0    411\n",
       "412        0      0  2644             0    412\n",
       "413        0      0  2644             0    413\n",
       "414        0      0  2644             0    414\n",
       "415        0      0  2644             0    415\n",
       "416        0      0  2644             0    416\n",
       "417        0      0  2644             0    417\n",
       "418        0      0  2644             0    418\n",
       "419        0      0  2644             0    419\n",
       "420        0      0  2644             0    420\n",
       "421        0      0  2644   -1073741772    421\n",
       "422        0      0  2644             0    422\n",
       "423        0      0  2644             0    423\n",
       "\n",
       "[424 rows x 5 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train[train['file_id'] == 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-02T16:44:07.657319Z",
     "start_time": "2018-08-02T16:44:00.755444Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       " 0                244952165\n",
       " 1                 54750841\n",
       " 2                 48588373\n",
       " 3221225524         6494839\n",
       "-1073741772         5096935\n",
       " 32                 3721217\n",
       " 4294967295         1921056\n",
       " 259                1698544\n",
       " 16                 1593565\n",
       " 65766              1566489\n",
       " 4                  1462188\n",
       " 13                 1434490\n",
       " 32775              1260090\n",
       " 3221225781         1021003\n",
       " 5                  1002186\n",
       " 32773               915290\n",
       " 12                  896519\n",
       " 6                   838891\n",
       " 10                  666565\n",
       " 19                  638404\n",
       " 3                   630110\n",
       " 3221225480          503686\n",
       " 2147483654          496810\n",
       " 3221225785          462406\n",
       " 3221225530          396803\n",
       " 8208                321874\n",
       " 17                  300858\n",
       " 7                   295772\n",
       " 2147483653          266089\n",
       "-1073741511          254538\n",
       "                    ...    \n",
       " 6423356                  1\n",
       " 5177393                  1\n",
       " 8791647054120            1\n",
       " 11486120                 1\n",
       " 4079658                  1\n",
       " 9651240                  1\n",
       " 189124                   1\n",
       " 52777596                 1\n",
       " 16066675                 1\n",
       " 3485808                  1\n",
       " 16958144                 1\n",
       " 8791610600608            1\n",
       " 79603                    1\n",
       " 1922106464               1\n",
       " 9913512                  1\n",
       " 33360244                 1\n",
       " 6896628                  1\n",
       " 3223795                  1\n",
       " 79602                    1\n",
       " 8464112                  1\n",
       " 8791607190816            1\n",
       " 1954784928               1\n",
       " 4653361                  1\n",
       " 10298736                 1\n",
       " 10900264                 1\n",
       " 1958191136               1\n",
       " 5062516                  1\n",
       " 9389992                  1\n",
       " 6110580                  1\n",
       " 1133955                  1\n",
       "Name: return_value, Length: 2084069, dtype: int64"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['return_value'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
